The aim is to characterize the human fetal kidney from the kidney
cell atlas. You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1] The rds data can be
download using the download link https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds
The azimuth compatible reference has been downloaded and created in the
R script
download-and-create-fetal-kidney-ref.R
Load required packages in the following chunk, if needed. Do not
install packages here; only load them with the library()
function.
The input file is the output of the R script
download-and-create-fetal-kidney-ref.R
fetal_kidney <- readRDS(path_to_data)
d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend()
d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend()
d1 | d2Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.
This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations.
We run DElegate::FindAllMarkers2 to find markers of the different clusters and manually check if they do make sense. DElegate::FindAllMarkers2 is an improved version of Seurat::FindAllMarkers based on pseudobulk differential expression method. Please check the preprint from Chistoph Hafemeister: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v1 and tool described here: https://github.com/cancerbits/DElegate
## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
DT::datatable(s.markers, caption = ("marker genes"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
group_by(group1) %>%
top_n(n = 5, wt = log_fc) -> top5
# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$compartment
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment")
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "compartment") + NoLegend() +
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) + geom_bar() + NoLegend()
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
DT::datatable(s.markers, caption = ("marker genes"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
group_by(group1) %>%
top_n(n = 5, wt = log_fc) -> top5
# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$cell_type
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend()
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "cell_type") + NoLegend() +
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) + geom_bar() + NoLegend() + scale_x_discrete(guide = guide_axis(angle = 90))
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))## R version 4.4.1 (2024-06-14)
## Platform: aarch64-unknown-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/aarch64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/aarch64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.2.0 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [5] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [9] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 SCpubr_2.0.2
## [13] Azimuth_0.5.0 shinyBS_0.61.1 Seurat_5.1.0 SeuratObject_5.0.2
## [17] sp_2.1-4 optparse_1.7.5
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.4 ProtGenerics_1.36.0
## [3] matrixStats_1.3.0 spatstat.sparse_3.1-0
## [5] bitops_1.0-8 DirichletMultinomial_1.46.0
## [7] TFBSTools_1.42.0 httr_1.4.7
## [9] RColorBrewer_1.1-3 tools_4.4.1
## [11] sctransform_0.4.1 utf8_1.2.4
## [13] R6_2.5.1 DT_0.33
## [15] lazyeval_0.2.2 uwot_0.2.2
## [17] rhdf5filters_1.16.0 withr_3.0.1
## [19] gridExtra_2.3 progressr_0.14.0
## [21] cli_3.6.3 Biobase_2.64.0
## [23] spatstat.explore_3.3-2 fastDummies_1.7.4
## [25] EnsDb.Hsapiens.v86_2.99.0 shinyjs_2.1.0
## [27] labeling_0.4.3 sass_0.4.9
## [29] spatstat.data_3.1-2 ggridges_0.5.6
## [31] pbapply_1.7-2 yulab.utils_0.1.7
## [33] Rsamtools_2.20.0 R.utils_2.12.3
## [35] parallelly_1.38.0 limma_3.60.4
## [37] BSgenome_1.72.0 rstudioapi_0.16.0
## [39] RSQLite_2.3.7 gridGraphics_0.5-1
## [41] generics_0.1.3 BiocIO_1.14.0
## [43] vroom_1.6.5 crosstalk_1.2.1
## [45] gtools_3.9.5 ica_1.0-3
## [47] spatstat.random_3.3-1 googlesheets4_1.1.1
## [49] GO.db_3.19.1 Matrix_1.7-0
## [51] fansi_1.0.6 S4Vectors_0.42.1
## [53] abind_1.4-5 R.methodsS3_1.8.2
## [55] lifecycle_1.0.4 edgeR_4.2.1
## [57] yaml_2.3.10 SummarizedExperiment_1.34.0
## [59] rhdf5_2.48.0 SparseArray_1.4.8
## [61] Rtsne_0.17 grid_4.4.1
## [63] blob_1.2.4 promises_1.3.0
## [65] shinydashboard_0.7.2 crayon_1.5.3
## [67] pwalign_1.0.0 miniUI_0.1.1.1
## [69] lattice_0.22-6 cowplot_1.1.3
## [71] GenomicFeatures_1.56.0 annotate_1.82.0
## [73] KEGGREST_1.44.1 pillar_1.9.0
## [75] knitr_1.48 GenomicRanges_1.56.1
## [77] rjson_0.2.22 future.apply_1.11.2
## [79] codetools_0.2-20 fastmatch_1.1-4
## [81] leiden_0.4.3.1 glue_1.7.0
## [83] spatstat.univar_3.0-0 data.table_1.16.0
## [85] vctrs_0.6.5 png_0.1-8
## [87] spam_2.10-0 cellranger_1.1.0
## [89] gtable_0.3.5 poweRlaw_0.80.0
## [91] assertthat_0.2.1 cachem_1.1.0
## [93] xfun_0.47 Signac_1.14.0
## [95] S4Arrays_1.4.1 mime_0.12
## [97] pracma_2.4.4 survival_3.7-0
## [99] DElegate_1.2.1 gargle_1.5.2
## [101] RcppRoll_0.3.1 statmod_1.5.0
## [103] fitdistrplus_1.2-1 ROCR_1.0-11
## [105] nlme_3.1-166 bit64_4.0.5
## [107] RcppAnnoy_0.0.22 GenomeInfoDb_1.40.1
## [109] rprojroot_2.0.4 bslib_0.8.0
## [111] irlba_2.3.5.1 KernSmooth_2.23-24
## [113] SeuratDisk_0.0.0.9021 colorspace_2.1-1
## [115] seqLogo_1.70.0 BiocGenerics_0.50.0
## [117] DBI_1.2.3 tidyselect_1.2.1
## [119] bit_4.0.5 compiler_4.4.1
## [121] curl_5.2.2 hdf5r_1.3.11
## [123] DelayedArray_0.30.1 plotly_4.10.4
## [125] rtracklayer_1.64.0 scales_1.3.0
## [127] caTools_1.18.2 lmtest_0.9-40
## [129] rappdirs_0.3.3 digest_0.6.37
## [131] goftest_1.2-3 presto_1.0.0
## [133] spatstat.utils_3.1-0 rmarkdown_2.28
## [135] XVector_0.44.0 htmltools_0.5.8.1
## [137] pkgconfig_2.0.3 sparseMatrixStats_1.16.0
## [139] MatrixGenerics_1.16.0 highr_0.11
## [141] fastmap_1.2.0 ensembldb_2.28.1
## [143] rlang_1.1.4 htmlwidgets_1.6.4
## [145] UCSC.utils_1.0.0 shiny_1.9.1
## [147] farver_2.1.2 jquerylib_0.1.4
## [149] zoo_1.8-12 jsonlite_1.8.8
## [151] BiocParallel_1.38.0 R.oo_1.26.0
## [153] RCurl_1.98-1.16 magrittr_2.0.3
## [155] ggplotify_0.1.2 GenomeInfoDbData_1.2.12
## [157] dotCall64_1.1-1 Rhdf5lib_1.26.0
## [159] munsell_0.5.1 Rcpp_1.0.13
## [161] viridis_0.6.5 reticulate_1.38.0
## [163] stringi_1.8.4 zlibbioc_1.50.0
## [165] MASS_7.3-61 plyr_1.8.9
## [167] parallel_4.4.1 listenv_0.9.1
## [169] ggrepel_0.9.5 deldir_2.0-4
## [171] CNEr_1.40.0 Biostrings_2.72.1
## [173] splines_4.4.1 tensor_1.5
## [175] hms_1.1.3 locfit_1.5-9.10
## [177] BSgenome.Hsapiens.UCSC.hg38_1.4.5 igraph_2.0.3
## [179] spatstat.geom_3.3-2 RcppHNSW_0.6.0
## [181] reshape2_1.4.4 stats4_4.4.1
## [183] TFMPvalue_0.0.9 XML_3.99-0.17
## [185] evaluate_0.24.0 JASPAR2020_0.99.10
## [187] tzdb_0.4.0 httpuv_1.6.15
## [189] RANN_2.6.2 getopt_1.20.4
## [191] polyclip_1.10-7 future_1.34.0
## [193] SeuratData_0.2.2.9001 scattermore_1.2
## [195] xtable_1.8-4 restfulr_0.0.15
## [197] AnnotationFilter_1.28.0 RSpectra_0.16-2
## [199] later_1.3.2 googledrive_2.1.1
## [201] viridisLite_0.4.2 memoise_2.0.1
## [203] AnnotationDbi_1.66.0 GenomicAlignments_1.40.0
## [205] IRanges_2.38.1 cluster_2.1.6
## [207] timechange_0.3.0 globals_0.16.3